/* **************************************************************
NAME: yemenmhctable.ado 
CREATED: 30 September 2021
EDITED: 30 September 2021 
AUTHOR: callan.corcoran@kellogg.northwestern.edu
PURPOSE: Program to create a multiple hypothesis correction table
 for the Yemen project 
************************************************************** */

cap prog drop yemenmhctable

prog def yemenmhctable
syntax varlist [if/], 	/// Variables to be included in table 
	[filename(string)] 		/// File name (default is TableX)
	[foldername(string)]	/// Output folder name (default is Replication/Output)
	[title(string)] 		/// Table title 
	[footnote(string)] 		/// Footnote text 
	[baselinevals] 			/// Add baseline values of variables, when applicable
	[winsorize] 			/// Winsorize variables 
	[addlcontrols(varlist)] /// Add list of additional controls 
	[fixedeffects(varlist)] /// Set fixed effects (default is village) 
	[stderrors(string)] 	/// Set standard errors (default is robust)
	
// **************************************************************
// PREPARE TABLE INPUTS 			

// FOLDER NAME 
// 	Set default folder to Replication/Output if not specified 
if "`foldername'"=="" {
	loc foldername "${rep_output}" 
}

// FILE NAME 
// 	Set a default filename 
if "`filename'"=="" {
	loc filename "TableX"
}
if "`winsorize'"=="winsorize" {
	loc filename "`filename'_win"
}
if "`baselinevals'"=="baselinevals" {
	loc filename "`filename'_blv"
}
if "`addlcontrols'"!="" {
	loc filename "`filename'_ctl"
}

// WINSORIZE 
//	Edit varlist to include winsorized variable names if "winsorize" selected 
loc templist `varlist'
loc varlist // Clear varlist 
foreach var in `templist' {
	loc currvar = subinstr("`var'","_end","",.) // Cut out "_end" suffix 
	if "`winsorize'"=="winsorize" {
		loc varlist `varlist' `currvar'_win1 // Add "_win1" suffix if "winsorize" is selected
	}
	if "`winsorize'"=="" {
		loc varlist `varlist' `currvar'
	}
}

// ADDITIONAL CONTROLS 
loc templist `addlcontrols'
loc addlcontrols 
foreach var in `templist' {
    loc currvar = subinstr("`var'","_bsl","_win1_bsl",.) // Cut out "_end" suffix 
	if "`winsorize'"=="winsorize" {
		loc addlcontrols `addlcontrols' `currvar' m_`currvar' // Add "_win1" suffix if "winsorize" is selected
	}
	if "`winsorize'"=="" {
		loc varlist `addlcontrols' `var' m_`var'
	}
}

// FIXED EFFECTS 
// 	Set fixed effects to village, if not specified 
if "`fixedeffects'"=="" {
	loc fixedeffects village 
}

// STANDARD ERRORS 
//	Default standard errors to robust 
if "`stderrors'"=="" {
	loc stderrors robust 
}

// IF-STATEMENT 
if "`if'"!="" {
    loc ifif "if `if'"
	loc andif "& `if'"
} 

// FOOTNOTES 
// 	Add footnotes about baseline values and additional controls
if "`winsorize'"=="winsorize" {
	loc footnote "`footnote' Variables winsorized at 1%."
}
if "`baselinevals'"=="baselinevals" {
	loc footnote "`footnote' Controls for baseline value of dependent variable."
}
if "`addlcontrols'"!="" {
	loc footnote "`footnote' Controls for additional variables."
}

// **************************************************************
// CREATE P-VALUE TABLE FRAMEWORK 

// Clear estimates 
clear
eststo clear
estimates drop _all 

// Count outcomes and create placeholder estimates 
loc columns: word count `varlist' 

matrix P = J(`columns', 1, .) 		// Set empty matrix for P-values 
matrix colnames P = "pval"			// Set column name 
matrix rownames P = `varlist'		// Set row names 
	
use "${rep_analysis}.dta", clear
	
// **************************************************************
// REPLACE MISSINGS WITH ZEROS

// ADDITIONAL CONTROLS 
// 	Replace missings with zeroes 
foreach var in `addlcontrols' {
	replace `var' = 0 if missing(`var') // Replace to 0 if missing 
}

// **************************************************************
// FILL TABLE CELLS 
//	Table cells include coefficients, standard errors, p-values 

loc i = 1 // Start a counter 

foreach y_var in `varlist' { // Loop through table variables 
	
	//***********************************************************
	// BASELINE CONTROLS 
	// 	Replace missings with zeroes 
	// 	Add "missing" dummies to controls 

	// Check whether baseline value exists 
	cap confirm variable `y_var'_bsl
	
	// If baseline doesn't exist... 
	if _rc { 
		loc proxyflag = 1 
		loc created_var = 1					
		gen `y_var'_bsl = 1 			// Create stand-in baseline var
		gen m_`y_var'_bsl = 1 			// Create stand-in baseline dummy var
	}
	
	// If baseline exists... 
	else if !_rc {
		loc proxyflag = 0 
		loc created_var = 0
		replace `y_var'_bsl = 0 if missing(`y_var'_bsl) // Replace baseline to 0 if missing 	
	}
	
	if "`baselinevals'"=="baselinevals" local blvals `y_var'_bsl m_`y_var'_bsl // Add to baseline variables local 
	else local blvals // Empty
	
	//***********************************************************
	// RUN REGRESSION 

	// MAIN / DEFAULT REGRESSION 
	qui areg `y_var'_end treatment `blvals' `addlcontrols' `ifif', absorb(`fixedeffects') vce(`stderrors') 
	loc coef "treatment"
	
	// Save to p-values matrix 
	matrix P[`i', 1] = r(table)[4,1]
	
	loc ++i

} // End foreach y_var 

// Export matrix of p-values 		
clear 

svmat2 P, names(col) rnames(outcome)		// This command saves matrices as a dataset	

//qqvalue:
cap ssc install qqvalue

qqvalue pval, method(simes) qvalue(q_FDR)
qqvalue pval, method(holm) qvalue(q_FWER)

save "${rep_data}/`filename'_mht_pvals.dta", replace 

mkmat pval q_FDR q_FWER, matrix(T) rown(outcome) 
	
// **************************************************************
// CREATE MAIN TABLE FRAMEWORK 
	
// Clear estimates 
clear
eststo clear
estimates drop _all 

**# Bookmark #1
// Create blank table
set obs 10
gen x = 1
gen y = 1

// Count outcomes and create placeholder estimates 
loc columns: word count `varlist' 

forval i = 1/`columns' { 
	eststo col`i': qui reg x y
}

// Set table cell locals 
loc j = 1
loc treatcoef = `j'		// Treatment starred coefficient
loc ++j
loc treatse = `j'		// Treatment standard error 
loc ++j
loc contmean = `j'		// Control mean 
loc ++j
loc contsd = `j'		// Control standard deviation 
loc ++j
loc obs = `j'			// Observations 
loc ++j
loc blcontrols  = `j'	// Binary for baseline controls 
loc ++j
loc numzero = `j'		// Count with outcome == 0
loc ++j
loc pvalz = `j'			// P-Value 
loc ++j 
loc qFDRz = `j'			// FDR (Q-Value, Simes method)
loc ++j 
loc qFWERz = `j'		// FWER (Q-value, family-wise error rate)

loc stats "" 			// Added scalars to be filled
loc col_titles "" 		// Labels for columns vars to be filled

use "${rep_analysis}.dta", clear

// **************************************************************
// REPLACE MISSINGS WITH ZEROS

// ADDITIONAL CONTROLS 
// 	Replace missings with zeroes 
foreach var in `addlcontrols' {
	replace `var' = 0 if missing(`var') // Replace to 0 if missing 
}

// **************************************************************
// FILL TABLE CELLS 
//	Table cells include coefficients, standard errors, p-values 

loc i = 1 // Start a counter 

foreach y_var in `varlist' { // Loop through table variables 
	
	//***********************************************************
	// BASELINE CONTROLS 
	// 	Replace missings with zeroes 
	// 	Add "missing" dummies to controls 

	// Check whether baseline value exists 
	cap confirm variable `y_var'_bsl
	
	// If baseline doesn't exist... 
	if _rc { 
		loc proxyflag = 1 
		loc created_var = 1					
		gen `y_var'_bsl = 1 			// Create stand-in baseline var
		gen m_`y_var'_bsl = 1 			// Create stand-in baseline dummy var
		estadd loc stat`blcontrols' = "No" : col`i' // Note that baseline controls not used
	}
	
	// If baseline exists... 
	else if !_rc {
		loc proxyflag = 0 
		loc created_var = 0
		replace `y_var'_bsl = 0 if missing(`y_var'_bsl) // Replace baseline to 0 if missing 	
		if ("`baselinevals'"=="baselinevals") estadd loc stat`blcontrols' = "Yes" : col`i' // Note baseline controls used 
		else estadd loc stat`blcontrols' = "No" : col`i' // Note baseline controls not used 
	}
	
	if "`baselinevals'"=="baselinevals" local blvals `y_var'_bsl m_`y_var'_bsl // Add to baseline variables local 
	else local blval // Empty
	
	//***********************************************************
	// RUN REGRESSION 
	
	// MAIN / DEFAULT REGRESSION 
	qui areg `y_var'_end treatment `blvals' `addlcontrols' `ifif', absorb(`fixedeffects') vce(`stderrors') 
	loc coef "treatment"
	
	//***********************************************************
	// SAVE STATISTICS
		
	// Save results to matrix A for later use
	mat def A = r(table)
	
	// Add measure of % zero
	qui count if `y_var'_end == 0 & e(sample) == 1
	loc top = r(N)
	qui count if  e(sample) == 1
	loc bot = r(N)
	estadd loc stat`numzero' = string(`top'/`bot',"%9.2f"): col`i'

	// Add treatment coefficient and standard error	
	// Standard error
	loc se = el(A,rownumb(A,"se"),colnumb(A,"`coef'"))
	estadd loc stat`treatse' = "(" + string(`se',"%9.2f") + ")" : col`i' //Add standard error
		
	// P-value: to get stars
	local thisp = el(A,rownumb(A,"pvalue"),colnumb(A,"`coef'"))
	
		if `thisp' < 0.01 {
			loc bstar "***"
		}
		else if `thisp' < 0.05 {
			loc bstar "**"
		}
		else if `thisp' < 0.1 {
			loc bstar "*"
		}
		else {
			local bstar ""
		}
	// Assign to coefficient
	loc coefficient = el(A,rownumb(A,"b"),colnumb(A,"`coef'"))
	estadd loc stat`treatcoef' = string(`coefficient',"%9.2f") + "`bstar'": col`i'

	// Save all the p and q values 
	loc p_val = el(T,rownumb(T,"`y_var'"),colnumb(T,"pval"))
	loc q_FDR_val = el(T,rownumb(T,"`y_var'"),colnumb(T,"q_FDR"))
	loc q_FWER_val = el(T,rownumb(T,"`y_var'"),colnumb(T,"q_FWER"))
	
	estadd loc stat`pvalz' = string(`p_val',"%9.2f"): col`i'
	estadd loc stat`qFDRz' = string(`q_FDR_val',"%9.2f"): col`i'
	estadd loc stat`qFWERz' = string(`q_FWER_val',"%9.2f"): col`i'
	
	// Control group mean and SD
	qui sum `y_var'_end if treatment == 0  & e(sample) == 1
	estadd loc stat`contmean' = string(`r(mean)', "%9.2f"): col`i'
	estadd loc stat`contsd' = string(`r(sd)', "%9.2f") : col`i'
		
	// Total observations
	qui sum `y_var'_end if e(sample) == 1
	estadd loc stat`obs' = string(`r(N)', "%9.0f"): col`i'
	
	loc ++i
	mat drop A
	
	// Row labels and update locals
	loc thisvarlabel: variable label `y_var'_end // Extracts label from row var
	local col_titles "`col_titles' "`thisvarlabel'" "		
	
}
// end foreach y_var
	
// Set stats to include in table
forv i = 1/`j' {
	loc stats "`stats' stat`i' "
}
// end forv i

	
//***********************************************************
// EXPORT TABLE 

cd "`foldername'" // Call output folder directory 

esttab col* using "`filename'.csv", title("`title'") cells(none) ///
	nonum mtitle(`col_titles') stats(`stats',labels("Treatment" " " "Control Mean" "Control SD" "Observations (Total)"  ///
	"Controls for Baseline Values" "Proportion of Obs Equal Zero" "p-Value" "FDR (Simes Method) q-Value" "FWER (Holm Method) q-Value")) ///
	note("`footnote'") compress wrap replace
	
end 

